#!/usr/bin/env python

import sys
import random
import urllib

uris = []
for line in file('koko-uri-sample-5000.txt'):
  uri = line.strip()
  uris.append(uri)

keywords = []
for line in file('search-keywords-sample.txt'):
  kw,lang = line.strip().split()
  keywords.append((kw,lang))

vocabs = []
for line in file('vocabs.txt'):
  voc = line.strip()
  vocabs.append(voc)

if len(sys.argv) < 3:
  print >>sys.stderr, "usage: %s <count> <baseurl> [paramtype]"
  print >>sys.stderr, "paramtype specifies what to use for %s substitutions, either 'uri', 'keyword' or 'vocab'"
  sys.exit(1)

count = int(sys.argv[1])
baseurl = sys.argv[2]
if len(sys.argv) > 3:
  paramtype = sys.argv[3]
else:
  paramtype = None

for i in range(count):
  if paramtype is None:
    print baseurl
  elif paramtype == 'uri':
    uri = random.choice(uris)
    print baseurl % urllib.quote(uri)
  elif paramtype == 'keyword':
    kw,lang = random.choice(keywords)
    print baseurl % (lang, urllib.quote(kw))
  elif paramtype == 'vocab':
    voc = random.choice(vocabs)
    print baseurl % voc
  else:
    print >>sys.stderr, "Unknown parameter type"
    sys.exit(2)
